libname red "/projects/data/F5500/microdata";
libname blue "/.../data";
libname icf "/.../lehd_s2014_icf/2014";
libname temp "/.../data/temp2";

%macro statelib(st);
	libname ecft26&st "/.../lehd_s2014_ecft26/&st";
	libname ecf&st "/.../lehd_s2014_ecf/&st";
	libname ehf&st "/.../lehd_s2014_ehf/&st";
%mend statelib;

%statelib(ak); %statelib(ar); %statelib(az); %statelib(ca); %statelib(co); 
%statelib(ct); %statelib(dc); %statelib(de); %statelib(fl); %statelib(ga); 
%statelib(hi); %statelib(ia); %statelib(id); %statelib(il); %statelib(in); 
%statelib(ks); %statelib(ky); %statelib(la); %statelib(ma); %statelib(md); 
%statelib(me); %statelib(mi); %statelib(mn); %statelib(mo); %statelib(ms);
%statelib(mt); %statelib(nc); %statelib(nd); %statelib(ne); %statelib(nh);
%statelib(nj); %statelib(nm); %statelib(nv); %statelib(ny); %statelib(oh);
%statelib(ok); %statelib(or); %statelib(pa); %statelib(ri); %statelib(sc);
%statelib(sd); %statelib(tn); %statelib(tx); %statelib(ut); %statelib(va);
%statelib(vt); %statelib(wa); %statelib(wi); %statelib(wv); %statelib(wy);

%include "/.../programs/includes/format_source_2000.sas";
%include "/.../programs/includes/format_source_2001.sas";
%include "/.../programs/includes/format_source_2002.sas";
%include "/.../programs/includes/format_source_2004.sas";
%include "/.../programs/includes/format_source_2005.sas";
%include "/.../programs/includes/format_source_2014.sas";

/**********************************************************************
* Author : Dhiren Patki
*
* This program links a clean set of DB-sponsoring employers to the 
* LEHD files. A panel dataset of DB sponsors is produced by 
* 03_createfirmpanel.do. 
*
* The goal of this program is to obtain information on the age structure
* of firms in the panel. 	
***********************************************************************/

%let states=ak ar az ca co ct dc de fl ga hi ia id il in ks ky
la ma md me mi mn mo ms mt nc nd ne nh nj nm nv ny oh ok or pa ri sc
sd tn tx ut va vt wa wi wv wy;
%let numstates=50;

/*Clean up temp directory*/
proc datasets lib=temp nolist kill;
quit;
run;

/*Pull in set of firm-years from firm panel (for pre-freeze years only use _pre)*/
data blue.unqfirmspanel;
	infile "/.../data/unqfirmspanel.csv" delimiter = ',' missover dsd lrecl=/*redacted*/ firstobs=/*redacted*/; 
	informat firmid /*redacted*/;
	informat year /*redacted*/;
	format firmid /*redacted*/;
	format year /*redacted*/;
	input firmid $
	      year;
run;


%macro buildpanel;

	%do i=1 %to &numstates.;

	/*Declare "thisstate" (picking a state from the list of possible states)*/
	%let thisstate=%scan(&states.,&i.);

	/*-------------------------------------------------------------------------------
	* Step 1: Get SEIN's associated with the firmids on the DB employer file
	* using the the ECFT26 file. A firmid can map to multiple sein's in a given 
	* year. 									
	*-------------------------------------------------------------------------------*/

	/*Next, merge with the ecft26 by firmid pulling all the sein's associated with the firmid's in "thisstate"*/
	proc sql;
		create table dbecfs_&thisstate as 
		select a.firmid, b.sein, b.year, b.fas_firm_id
		from blue.unqfirmspanel as a inner join
		ecft26&thisstate..ecf_&thisstate._t26 as b
		on a.firmid = b.fas_firm_id & a.year = b.year;
	quit;
	
	/*De-dup by sein-year (gets rid of seinunit and quarter duplications)*/
	proc sort data = dbecfs_&thisstate nodupkey;
		by sein year;
	run;

	/*-------------------------------------------------------------------------------
	* Step 2: Using the sein's obtain all workers (pik's) who worked for DB sponsors
	* by merging with the EHF (this is a sein-pik panel)	
	*-------------------------------------------------------------------------------*/
	
	/*Merge with the EHF (gives all pik-years in "thisstate")*/
	proc sql;
		create table temp.ee_link_&thisstate as 
		select a.firmid,a.sein,a.year,b.pik,b.source
		from dbecfs_&thisstate as a inner join
		ehf&thisstate..ehf_&thisstate as b 
		on a.sein = b.sein & a.year = b.year;
	quit;

	/*Clean up the working directory*/
	proc datasets lib=work nolist kill;
	quit;
	run;

	%end;
	
%mend;

%buildpanel;


/*-------------------------------------------------------------------------------
* Step 3: Stack the state-level files 
*-------------------------------------------------------------------------------*/

data ee_panel0;
	set temp.ee_link_ak temp.ee_link_ar temp.ee_link_az
	     temp.ee_link_ca temp.ee_link_co temp.ee_link_ct temp.ee_link_dc
	     temp.ee_link_de temp.ee_link_fl temp.ee_link_ga temp.ee_link_hi 
	     temp.ee_link_ia temp.ee_link_id temp.ee_link_il temp.ee_link_in
	     temp.ee_link_ks temp.ee_link_ky temp.ee_link_la temp.ee_link_md
	     temp.ee_link_me temp.ee_link_mi temp.ee_link_mn temp.ee_link_mo
	     temp.ee_link_ms temp.ee_link_mt temp.ee_link_nc temp.ee_link_nd
	     temp.ee_link_ne temp.ee_link_nh temp.ee_link_nj temp.ee_link_nm
	     temp.ee_link_nv temp.ee_link_ny temp.ee_link_oh temp.ee_link_ok
	     temp.ee_link_or temp.ee_link_pa temp.ee_link_ri temp.ee_link_sc
	     temp.ee_link_sd temp.ee_link_tn temp.ee_link_tx temp.ee_link_ut
	     temp.ee_link_va temp.ee_link_vt temp.ee_link_wa temp.ee_link_wi
	     temp.ee_link_wv temp.ee_link_wy temp.ee_link_ma;
	if year = 2015 then delete; /*drop the first qtr of 2015 which is in the s2014 LEHD*/
run;

/*Clean up base state level files*/
proc datasets lib=temp nolist;
	delete ee_link_ak ee_link_ar ee_link_az
	     ee_link_ca ee_link_co ee_link_ct ee_link_dc
	     ee_link_de ee_link_fl ee_link_ga ee_link_hi 
	     ee_link_ia ee_link_id ee_link_il ee_link_in
	     ee_link_ks ee_link_ky ee_link_la ee_link_md
	     ee_link_me ee_link_mi ee_link_mn ee_link_mo
	     ee_link_ms ee_link_mt ee_link_nc ee_link_nd
	     ee_link_ne ee_link_nh ee_link_nj ee_link_nm
	     ee_link_nv ee_link_ny ee_link_oh ee_link_ok
	     ee_link_or ee_link_pa ee_link_ri ee_link_sc
	     ee_link_sd ee_link_tn ee_link_tx ee_link_ut
	     ee_link_va ee_link_vt ee_link_wa ee_link_wi
	     ee_link_wv ee_link_wy ee_link_ma;
quit;
run;


/*Add info from ICF*/
proc sort data = ee_panel0;
by pik;
run;

data blue.ee_panel_all (drop=dob); /*keep this file for disclosure stats*/
    merge icf.icf_us (in=a keep=dob pik) ee_panel0 (in=b);
    by pik;
    if a and b;
    yob = year(dob);
run;

/*Clean up*/
proc datasets lib=work nolist;
  delete ee_panel0;
run;

/*-------------------------------------------------------------------------------
* Create variables describing the age structure of employment at each 
* firmid								
*-------------------------------------------------------------------------------*/

/*Create age bin flags*/
data temp.ee_panel1;
	set blue.ee_panel_all;
	age = year - yob;
	age_lte25=0;
	age_26_30=0;
	age_31_35=0;
	age_36_40=0;
	age_41_45=0;
	age_46_50=0;
	age_51_55=0;
	age_56_60=0;
	age_61_65=0;
	age_66_70=0;
	age_gte71=0;
	if age<=25 then age_lte25=1;
	if age>=26 & age<=30 then age_26_30=1;
	if age>=31 & age<=35 then age_31_35=1;
	if age>=36 & age<=40 then age_36_40=1;
	if age>=41 & age<=45 then age_41_45=1;
	if age>=46 & age<=50 then age_46_50=1;
	if age>=51 & age<=55 then age_51_55=1;
	if age>=56 & age<=60 then age_56_60=1;
	if age>=61 & age<=65 then age_61_65=1;
	if age>=66 & age<=70 then age_66_70=1;
	if age>=71 then age_gte71=1;
	cnt=1; /*count is for total # of workers (denominator)*/
run;

/*Collapse to firm - year level*/
proc sql;
	create table firmagestruc as 
	select firmid, year, sum(cnt) as lehd_size, sum(age_lte25) as tot_lte25,
	sum(age_26_30) as tot_26_30, sum(age_31_35) as tot_31_35,
	sum(age_36_40) as tot_36_40, sum(age_41_45) as tot_41_45,
	sum(age_46_50) as tot_46_50, sum(age_51_55) as tot_51_55,
	sum(age_56_60) as tot_56_60, sum(age_61_65) as tot_61_65,
	sum(age_66_70) as tot_66_70, sum(age_gte71) as tot_gte71
	from temp.ee_panel1
	group by firmid, year;
quit;

proc datasets lib=temp nolist;
	delete ee_panel1;
quit;
run;

proc sort data = firmagestruc nodupkey;
	by firmid year;
run;

data firmagestruc (drop = tot_lte25 tot_26_30 tot_31_35 
	tot_36_40 tot_41_45 tot_46_50 tot_51_55 tot_56_60 
	tot_61_65 tot_66_70 tot_gte71);
	set firmagestruc;
	frac_lte25 = tot_lte25/lehd_size;
	frac_26_30 = tot_26_30/lehd_size;
	frac_31_35 = tot_31_35/lehd_size;
	frac_36_40 = tot_36_40/lehd_size;
	frac_41_45 = tot_41_45/lehd_size;
	frac_46_50 = tot_46_50/lehd_size;
	frac_51_55 = tot_51_55/lehd_size;
	frac_56_60 = tot_56_60/lehd_size;
	frac_61_65 = tot_61_65/lehd_size;
	frac_66_70 = tot_66_70/lehd_size;
        frac_gte71 = tot_gte71/lehd_size;
run;

/*Export to csv*/
proc export data = firmagestruc
	outfile = "/.../data/firmagestruc_a.csv"
	dbms = csv replace;
run;

proc datasets lib=work nolist;
	delete firmagestruc;
quit;
run;

/*Clean up temp directory*/
proc datasets lib=temp nolist kill;
quit;
run;